import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import geopandas as gpd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
#ACS data for population
#2022
url = 'https://api.census.gov/data/2022/acs/acs1?'
params = {
'get': 'NAME,B01001_001E',
'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2022=df
#2021
url = 'https://api.census.gov/data/2021/acs/acs1?'
params = {
'get': 'NAME,B01001_001E',
'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2021=df
#2020
df = pd.read_excel("C:/Users/thoma/Downloads/XK200101.xlsx")
state_names = df.iloc[5].tolist()[1::2]
populations = df.iloc[7].tolist()[1::2]
df_new = pd.DataFrame({'State': state_names, 'Population': populations})
pop_2020=df_new
#2019
url = 'https://api.census.gov/data/2019/acs/acs1?'
params = {
'get': 'NAME,B01001_001E',
'for': 'state:*'
}
r = requests.get(url, params=params).json()
df = pd.DataFrame(r).drop(0).fillna(0)
df = df.rename(columns={0: 'State', 1: "Population"})
pop_2019=df
# merged_pop
merged_pop = pd.merge(pop_2022, pop_2021, on='State', suffixes=('_2022', '_2021'))
merged_pop = pd.merge(merged_pop, pop_2020, on='State')
merged_pop = pd.merge(merged_pop, pop_2019, on='State', suffixes=('_2020', '_2019'))
#2019 , from bls for employment
url = "https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2019&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2019 Employment': annual_avg_employment})
df_2019 = pd.DataFrame(data)
#2020
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2020&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2020 Employment': annual_avg_employment})
df_2020 = pd.DataFrame(data)
#2021
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2021&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2021 Employment': annual_avg_employment})
df_2021 = pd.DataFrame(data)
#2022
url="https://data.bls.gov/cew/apps/table_maker/v4/table_maker.htm#type=0&year=2022&qtr=A&own=0&ind=10&supp=0"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.verdana.md tbody tr')))
html_content = driver.page_source
driver.quit()
soup = BeautifulSoup(html_content, "html.parser")
rows = soup.select('.verdana.md tbody tr')
data = []
for row in rows:
cells = row.find_all(['th', 'td'])
if len(cells) >= 3:
state = cells[0].get_text(strip=True)
annual_avg_employment = cells[2].get_text(strip=True)
data.append({'State': state, '2022 Employment': annual_avg_employment})
df_2022 = pd.DataFrame(data)
#merged of bls
merged_df = pd.merge(df_2019, df_2020, on='State', how='inner', suffixes=('_2019', '_2020'))
merged_df = pd.merge(merged_df, df_2021, on='State', how='inner', suffixes=('_2020', '_2021'))
merged_df = pd.merge(merged_df, df_2022, on='State', how='inner', suffixes=('_2021', '_2022'))
merged_df.columns = ["State", "2019", "2020", "2021", "2022"]
merged_df = merged_df.head(52)
#for bls,unemployment rate=poulation-employment/population
merged_df = merged_df.drop(0)
m_df = pd.merge(merged_df, merged_pop, on='State', how='left')
m_df['Population_2022'] = m_df['Population_2022'].str.replace(',', '').astype(int)
m_df['2022'] = m_df['2022'].str.replace(',', '').astype(int)
m_df['Population_2021'] = m_df['Population_2021'].str.replace(',', '').astype(int)
m_df['2021'] = m_df['2021'].str.replace(',', '').astype(int)
m_df['Population_2020'] = m_df['Population_2020'].str.replace(',', '').astype(int)
m_df['2020'] = m_df['2020'].str.replace(',', '').astype(int)
m_df['Population_2019'] = m_df['Population_2019'].str.replace(',', '').astype(int)
m_df['2019'] = m_df['2019'].str.replace(',', '').astype(int)
m_df['Unemployment Rate 2022'] = ((m_df['Population_2022'] - m_df['2022']) / m_df['Population_2022']) * 100
m_df['Unemployment Rate 2021'] = ((m_df['Population_2021'] - m_df['2021']) / m_df['Population_2021']) * 100
m_df['Unemployment Rate 2020'] = ((m_df['Population_2020'] - m_df['2020']) / m_df['Population_2020']) * 100
m_df['Unemployment Rate 2019'] = ((m_df['Population_2019'] - m_df['2019']) / m_df['Population_2019']) * 100
# interactive map for unemployment
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, m_df, on='State', how='left')
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color=(merged_df['2022'] - merged_df['2019']) / merged_df['2019'] * 100, # Calculate unemployment rate change
hover_name='State',
hover_data=['2019', '2020', '2021', '2022'],
title='Unemployment Rate Change (2019-2022) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#top five and worst five each year
top_five_2019 = merged_df.nlargest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
worst_five_2019 = merged_df.nsmallest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
top_five_2020 = merged_df.nlargest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
worst_five_2020 = merged_df.nsmallest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
top_five_2021 = merged_df.nlargest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
worst_five_2021 = merged_df.nsmallest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
top_five_2022 = merged_df.nlargest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
worst_five_2022 = merged_df.nsmallest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
print("Top Five States with Highest Unemployment Rate (2019):")
print(top_five_2019)
print("\nWorst Five States with Lowest Unemployment Rate (2019):")
print(worst_five_2019)
print("\nTop Five States with Highest Unemployment Rate (2020):")
print(top_five_2020)
print("\nWorst Five States with Lowest Unemployment Rate (2020):")
print(worst_five_2020)
print("\nTop Five States with Highest Unemployment Rate (2021):")
print(top_five_2021)
print("\nWorst Five States with Lowest Unemployment Rate (2021):")
print(worst_five_2021)
print("\nTop Five States with Highest Unemployment Rate (2022):")
print(top_five_2022)
print("\nWorst Five States with Lowest Unemployment Rate (2022):")
print(worst_five_2022)
Top Five States with Highest Unemployment Rate (2019):
State Unemployment Rate 2019
38 Mississippi 61.843375
50 West Virginia 61.567829
42 New Mexico 60.098129
20 Arizona 60.036556
21 Arkansas 59.636013
Worst Five States with Lowest Unemployment Rate (2019):
State Unemployment Rate 2019
26 District of Columbia -9.959915
43 North Dakota 44.514095
1 Massachusetts 47.238079
37 Minnesota 48.538752
40 Nebraska 49.209060
Top Five States with Highest Unemployment Rate (2020):
State Unemployment Rate 2020
50 West Virginia 64.028292
38 Mississippi 63.265926
42 New Mexico 62.884474
20 Arizona 61.951119
36 Louisiana 61.669500
Worst Five States with Lowest Unemployment Rate (2020):
State Unemployment Rate 2020
26 District of Columbia -1.861210
43 North Dakota 48.261944
40 Nebraska 51.065185
1 Massachusetts 51.778395
37 Minnesota 52.174519
Top Five States with Highest Unemployment Rate (2021):
State Unemployment Rate 2021
50 West Virginia 63.105938
42 New Mexico 62.347008
38 Mississippi 62.120805
18 Alabama 61.007203
36 Louisiana 60.720641
Worst Five States with Lowest Unemployment Rate (2021):
State Unemployment Rate 2021
26 District of Columbia -8.324155
43 North Dakota 48.322726
1 Massachusetts 50.520515
40 Nebraska 50.819782
37 Minnesota 51.381262
Top Five States with Highest Unemployment Rate (2022):
State Unemployment Rate 2022
50 West Virginia 62.043787
38 Mississippi 61.039769
42 New Mexico 60.590278
18 Alabama 60.071269
44 Oklahoma 59.577467
Worst Five States with Lowest Unemployment Rate (2022):
State Unemployment Rate 2022
26 District of Columbia -11.451869
43 North Dakota 47.218326
1 Massachusetts 48.501212
40 Nebraska 49.999670
37 Minnesota 50.080914
#bls employment rate
merged_df.columns = ["State", "STATE 2019", "STATE 2020", "STATE 2021", "STATE 2022"]
merged_df["STATE 2019"] = pd.to_numeric(merged_df["STATE 2019"].replace({",": ""}, regex=True), errors="coerce")
merged_df["STATE 2020"] = pd.to_numeric(merged_df["STATE 2020"].replace({",": ""}, regex=True), errors="coerce")
merged_df["STATE 2021"] = pd.to_numeric(merged_df["STATE 2021"].replace({",": ""}, regex=True), errors="coerce")
merged_df["STATE 2022"] = pd.to_numeric(merged_df["STATE 2022"].replace({",": ""}, regex=True), errors="coerce")
df_rate_changes_merged = pd.DataFrame()
df_rate_changes_merged["2020-2019"] = ((merged_df["STATE 2020"] - merged_df["STATE 2019"]) / merged_df["STATE 2019"]) * 100
df_rate_changes_merged["2021-2020"] = ((merged_df["STATE 2021"] - merged_df["STATE 2020"]) / merged_df["STATE 2020"]) * 100
df_rate_changes_merged["2022-2021"] = ((merged_df["STATE 2022"] - merged_df["STATE 2021"]) / merged_df["STATE 2021"]) * 100
df_rate_changes_merged["State"] = merged_df["State"]
#interactive plot for bls for unemployment rate change
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, df_rate_changes_merged, on='State', how='left')
#2020-20
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color='2020-2019',
hover_name='State',
hover_data=['2020-2019'],
title='Employment Rate Change (2020-2019) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#2021-2020
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color='2021-2020',
hover_name='State',
hover_data=['2021-2020'],
title='Employment Rate Change (2021-2020) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#2022-2021
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color='2022-2021',
hover_name='State',
hover_data=['2022-2021'],
title='Employment Rate Change (2022-2021) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
# Find top and worst five states for each year
top_five_2019 = df_rate_changes_merged.nlargest(5, "2020-2019")
worst_five_2019 = df_rate_changes_merged.nsmallest(5, "2020-2019")
top_five_2020 = df_rate_changes_merged.nlargest(5, "2021-2020")
worst_five_2020 = df_rate_changes_merged.nsmallest(5, "2021-2020")
top_five_2021 = df_rate_changes_merged.nlargest(5, "2022-2021")
worst_five_2021 = df_rate_changes_merged.nsmallest(5, "2022-2021")
# Plot
for i, (top, worst, year) in enumerate(zip([top_five_2019, top_five_2020, top_five_2021],
[worst_five_2019, worst_five_2020, worst_five_2021],
["2020-2019", "2021-2020", "2022-2021"])):
plt.figure(figsize=(15, 6))
plt.bar(top["State"], top[year], color="green", label="Top 5")
plt.bar(worst["State"], worst[year], color="red", label="Worst 5")
plt.title(f"Top and Worst Five States for Employment Rate Change {year} (in %)")
plt.xlabel("State")
plt.ylabel("Rate Change (%)")
plt.legend()
plt.show()
print(f"Top Five States for Employment Rate Change {year}:")
print(top)
print("\nWorst Five States for Employment Rate Change {year}:")
print(worst)
print("\n" + "-" * 50)
#seperated by region
state_to_region = {
'Alabama': 'South', 'Alaska': 'West', 'Arizona': 'West', 'Arkansas': 'South',
'California': 'West', 'Colorado': 'West', 'Connecticut': 'Northeast', 'Delaware': 'South',
'District of Columbia': 'Northeast', 'Florida': 'South', 'Georgia': 'South', 'Hawaii': 'West',
'Idaho': 'West', 'Illinois': 'Midwest', 'Indiana': 'Midwest', 'Iowa': 'Midwest', 'Kansas': 'Midwest',
'Kentucky': 'South', 'Louisiana': 'South', 'Maine': 'Northeast', 'Maryland': 'South',
'Massachusetts': 'Northeast', 'Michigan': 'Midwest', 'Minnesota': 'Midwest', 'Mississippi': 'South',
'Missouri': 'Midwest', 'Montana': 'West', 'Nebraska': 'Midwest', 'Nevada': 'West',
'New Hampshire': 'Northeast', 'New Jersey': 'Northeast', 'New Mexico': 'West', 'New York': 'Northeast',
'North Carolina': 'South', 'North Dakota': 'Midwest', 'Ohio': 'Midwest', 'Oklahoma': 'South',
'Oregon': 'West', 'Pennsylvania': 'Northeast', 'Rhode Island': 'Northeast', 'South Carolina': 'South',
'South Dakota': 'Midwest', 'Tennessee': 'South', 'Texas': 'South', 'Utah': 'West', 'Vermont': 'Northeast',
'Virginia': 'South', 'Washington': 'West', 'West Virginia': 'South', 'Wisconsin': 'Midwest', 'Wyoming': 'West'
}
df_rate_changes_merged['Region'] = df_rate_changes_merged['State'].map(state_to_region)
df_rate_changes_merged['Region'] = df_rate_changes_merged['State'].map(state_to_region)
df_rate_changes_merged = df_rate_changes_merged.sort_values('Region')
df_rate_changes_merged = df_rate_changes_merged.iloc[:-1]
region_means = df_rate_changes_merged.groupby('Region')[["2020-2019", "2021-2020", "2022-2021"]].mean()
# Overall trend for 2020-2019
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes_merged, x='Region', y='2020-2019')
plt.title('Overall Trend for 2020-2019')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
# Overall trend for 2021-2020
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes_merged, x='Region', y='2021-2020')
plt.title('Overall Trend for 2021-2020')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
# Overall trend for 2022-2021
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes_merged, x='Region', y='2022-2021')
plt.title('Overall Trend for 2022-2021')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
#bea for employment
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
url="https://apps.bea.gov/itable/?ReqID=70&step=1&_gl=1*1ntcawo*_ga*NzUyMDc4MzIzLjE3MDk0MzIyNTM.*_ga_J4698JNNFT*MTcwOTQzMjI1My4xLjEuMTcwOTQzMzExMS42MC4wLjA.#eyJhcHBpZCI6NzAsInN0ZXBzIjpbMSwyOSwyNSwzMSwyNiwyNywzMF0sImRhdGEiOltbIlRhYmxlSWQiLCI2MDAiXSxbIk1ham9yX0FyZWEiLCIwIl0sWyJTdGF0ZSIsWyIwIl1dLFsiQXJlYSIsWyJYWCJdXSxbIlN0YXRpc3RpYyIsWyIxNSJdXSxbIlVuaXRfb2ZfbWVhc3VyZSIsIkxldmVscyJdLFsiWWVhciIsWyIyMDIyIiwiMjAyMSIsIjIwMjAiLCIyMDE5Il1dLFsiWWVhckJlZ2luIiwiLTEiXSxbIlllYXJfRW5kIiwiLTEiXV19"
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.XPATH, "//table")))
html_content = driver.page_source
soup = BeautifulSoup(html_content, "html.parser")
tables = soup.find_all("table")
data = []
for table in tables:
rows = table.find_all("tr")
for row in rows:
cells = row.find_all(["th", "td"])
row_data = [cell.get_text(strip=True) for cell in cells]
data.append(row_data)
driver.quit()
df2 = pd.DataFrame(data)
df2 = df2.iloc[1:]
df2 = df2.drop(columns=[0])
#for bea,unemployment rate=poulation-employment/population
df2.columns = ["State", "2019", "2020", "2021", "2022"]
m_df = pd.merge(df2, merged_pop, on='State', how='left')
m_df = m_df.drop(0)
m_df['Population_2022'] = m_df['Population_2022'].str.replace(',', '').astype(int)
m_df['2022'] = m_df['2022'].str.replace(',', '').astype(int)
m_df['Population_2021'] = m_df['Population_2021'].str.replace(',', '').astype(int)
m_df['2021'] = m_df['2021'].str.replace(',', '').astype(int)
m_df['Population_2020'] = m_df['Population_2020'].str.replace(',', '').astype(int)
m_df['2020'] = m_df['2020'].str.replace(',', '').astype(int)
m_df['Population_2019'] = m_df['Population_2019'].str.replace(',', '').astype(int)
m_df['2019'] = m_df['2019'].str.replace(',', '').astype(int)
m_df['Unemployment Rate 2022'] = ((m_df['Population_2022'] - m_df['2022']) / m_df['Population_2022']) * 100
m_df['Unemployment Rate 2021'] = ((m_df['Population_2021'] - m_df['2021']) / m_df['Population_2021']) * 100
m_df['Unemployment Rate 2020'] = ((m_df['Population_2020'] - m_df['2020']) / m_df['Population_2020']) * 100
m_df['Unemployment Rate 2019'] = ((m_df['Population_2019'] - m_df['2019']) / m_df['Population_2019']) * 100
# interactive map for unemployment
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'State'}, inplace=True)
merged_df = pd.merge(gdf_states, m_df, on='State', how='left')
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color=(merged_df['2022'] - merged_df['2019']) / merged_df['2019'] * 100, # Calculate unemployment rate change
hover_name='State',
hover_data=['2019', '2020', '2021', '2022'],
title='Unemployment Rate Change (2019-2022) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#top five and worst five each year
top_five_2019 = merged_df.nlargest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
worst_five_2019 = merged_df.nsmallest(5, 'Unemployment Rate 2019')[['State', 'Unemployment Rate 2019']]
top_five_2020 = merged_df.nlargest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
worst_five_2020 = merged_df.nsmallest(5, 'Unemployment Rate 2020')[['State', 'Unemployment Rate 2020']]
top_five_2021 = merged_df.nlargest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
worst_five_2021 = merged_df.nsmallest(5, 'Unemployment Rate 2021')[['State', 'Unemployment Rate 2021']]
top_five_2022 = merged_df.nlargest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
worst_five_2022 = merged_df.nsmallest(5, 'Unemployment Rate 2022')[['State', 'Unemployment Rate 2022']]
print("Top Five States with Highest Unemployment Rate (2019):")
print(top_five_2019)
print("\nWorst Five States with Lowest Unemployment Rate (2019):")
print(worst_five_2019)
print("\nTop Five States with Highest Unemployment Rate (2020):")
print(top_five_2020)
print("\nWorst Five States with Lowest Unemployment Rate (2020):")
print(worst_five_2020)
print("\nTop Five States with Highest Unemployment Rate (2021):")
print(top_five_2021)
print("\nWorst Five States with Lowest Unemployment Rate (2021):")
print(worst_five_2021)
print("\nTop Five States with Highest Unemployment Rate (2022):")
print(top_five_2022)
print("\nWorst Five States with Lowest Unemployment Rate (2022):")
print(worst_five_2022)
Top Five States with Highest Unemployment Rate (2019):
State Unemployment Rate 2019
50 West Virginia 50.805710
42 New Mexico 46.736525
20 Arizona 46.007724
38 Mississippi 45.993699
21 Arkansas 44.863218
Worst Five States with Lowest Unemployment Rate (2019):
State Unemployment Rate 2019
26 District of Columbia -29.679532
43 North Dakota 23.706208
1 Massachusetts 29.301344
51 Wyoming 29.496906
48 Vermont 30.536917
Top Five States with Highest Unemployment Rate (2020):
State Unemployment Rate 2020
50 West Virginia 52.887125
42 New Mexico 49.222719
20 Arizona 47.179340
38 Mississippi 46.969868
2 Michigan 45.941814
Worst Five States with Lowest Unemployment Rate (2020):
State Unemployment Rate 2020
26 District of Columbia -21.501902
43 North Dakota 26.768403
51 Wyoming 31.017968
47 South Dakota 32.089713
40 Nebraska 32.613335
Top Five States with Highest Unemployment Rate (2021):
State Unemployment Rate 2021
50 West Virginia 51.512682
42 New Mexico 48.359994
18 Alabama 45.048976
38 Mississippi 44.891041
2 Michigan 44.266816
Worst Five States with Lowest Unemployment Rate (2021):
State Unemployment Rate 2021
26 District of Columbia -29.801955
43 North Dakota 26.726180
51 Wyoming 27.362332
47 South Dakota 30.139964
23 Colorado 31.368554
Top Five States with Highest Unemployment Rate (2022):
State Unemployment Rate 2022
50 West Virginia 49.829198
42 New Mexico 46.081282
18 Alabama 43.441790
38 Mississippi 43.057499
21 Arkansas 42.358988
Worst Five States with Lowest Unemployment Rate (2022):
State Unemployment Rate 2022
26 District of Columbia -34.676237
43 North Dakota 24.567122
51 Wyoming 24.909139
23 Colorado 28.287721
1 Massachusetts 28.354875
# for bea, unemployment change rate
df2.columns = ["state", "2019", "2020", "2021", "2022"]
df2 = df2[(df2["state"] != "GeoName") & (df2["state"] != "state")]
for year in ["2019", "2020", "2021", "2022"]:
df2[year] = df2[year].astype(str).str.replace(",", "")
df2["2019"] = pd.to_numeric(df2["2019"], errors="coerce")
df2["2020"] = pd.to_numeric(df2["2020"], errors="coerce")
df2["2021"] = pd.to_numeric(df2["2021"], errors="coerce")
df2["2022"] = pd.to_numeric(df2["2022"], errors="coerce")
df_rate_changes = pd.DataFrame()
for year in ["2020", "2021", "2022"]:
previous_year = str(int(year) - 1)
new_column_name = f"{year}-{previous_year}"
df_rate_changes[new_column_name] = ((df2[year] - df2[previous_year]) / df2[previous_year]) * 100
df_rate_changes["state"] = df2["state"]
df_rate_changes = df_rate_changes.drop(df_rate_changes.index[0])
#interactive plot for bea
#2020-2019
us_states_geojson_path = 'C:/Users/thoma/Downloads/gz_2010_us_040_00_500k.json'
gdf_states = gpd.read_file(us_states_geojson_path)
gdf_states.rename(columns={'NAME': 'state'}, inplace=True)
merged_df = pd.merge(gdf_states, df_rate_changes, on='state', how='left')
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color='2020-2019',
hover_name='state',
hover_data=['2020-2019'],
title='Employment Rate Change (2020-2019) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#2021-2020
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color='2021-2020',
hover_name='state',
hover_data=['2021-2020'],
title='Employment Rate Change (2021-2020) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#2022-2021
fig = px.choropleth(
merged_df,
geojson=gdf_states.geometry,
locations=gdf_states.index,
color='2022-2021',
hover_name='state',
hover_data=['2022-2021'],
title='Employment Rate Change (2022-2021) by State',
color_continuous_scale="Viridis",
scope="usa"
)
fig.show()
#region for bea
df_rate_changes['Region'] = df_rate_changes['state'].map(state_to_region)
df_rate_changes= df_rate_changes.sort_values('Region')
df_rate_changes = df_rate_changes.iloc[:-1]
region_means = df_rate_changes.groupby('Region')[["2020-2019", "2021-2020", "2022-2021"]].mean()
# Overall trend for 2020-2019
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes, x='Region', y='2020-2019')
plt.title('Overall Trend for 2020-2019')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
# Overall trend for 2021-2020
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes, x='Region', y='2021-2020')
plt.title('Overall Trend for 2021-2020')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
# Overall trend for 2022-2021
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_rate_changes, x='Region', y='2022-2021')
plt.title('Overall Trend for 2022-2021')
plt.ylabel('Employment Rate Change')
plt.xlabel('Region')
plt.xticks(rotation=45)
plt.show()
#top and worst five
for year in ["2020-2019", "2021-2020", "2022-2021"]:
sorted_df = df_rate_changes.sort_values(by=year, ascending=False)
top_five = sorted_df.head(5)
worst_five = sorted_df.tail(5)
print(f"\nTop Five States in {year}:\n{top_five.iloc[:, [0, -1]]}")
print(f"\nWorst Five States in {year}:\n{worst_five.iloc[:, [0, -1]]}")
fig, ax = plt.subplots(figsize=(18, 6))
ax.bar(top_five["state"], top_five[year], label="Top Five", color="green")
ax.bar(worst_five["state"], worst_five[year], label="Worst Five", color="red")
ax.set_title(f"Top and Worst Five Employment Rate Changes for U.S. States ({year})")
ax.set_xlabel("State")
ax.set_ylabel("Employment Rate Change (%)")
ax.legend(loc="upper left", bbox_to_anchor=(1, 1))
plt.show()